Cream of the Crop 25

home *** CD-ROM | disk | FTP | other *** search

/ Cream of the Crop 25 / Cream of the Crop 25.iso / os2 / gnuwget.zip / wget-1.4.3 / src / http.c < prev next >

Wrap

C/C++ Source or Header | 1997-02-09 | 33KB | 1,212 lines

/* HTTP support. Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifdef HAVE_CONFIG_H # include <config.h> #endif /* HAVE_CONFIG_H */ #include <stdio.h> #include <stdlib.h> #include <sys/types.h> #ifdef HAVE_UNISTD_H # include <unistd.h> #endif #include <sys/stat.h> #ifdef HAVE_STRING_H # include <string.h> #else # include <strings.h> #endif #include <ctype.h> #include <assert.h> #ifdef WINDOWS # include <winsock.h> #endif #include "wget.h" #include "options.h" #include "utils.h" #include "url.h" #include "host.h" #include "http.h" #include "retr.h" #include "connect.h" #include "mtch.h" #include "netrc.h" extern char *version_string; extern struct options opt; #ifndef errno extern int errno; #endif #ifndef h_errno extern int h_errno; #endif /* Function to fetch a header from socket/file descriptor fd. The header may be of arbitrary length, since the function allocates as much memory as necessary for the header to fit. Most errors are handled. The header may be terminated by LF or CRLF. If the character after LF is SP or HT (horizontal tab), the header spans to another line (continuation header), as per RFC2068. The trailing CRLF or LF are stripped from the header, and it is zero-terminated. */ uerr_t fetch_next_header(int fd, char **hdr) { int i, bufsize, res; char next; bufsize = DYNAMIC_LINE_BUFFER; *hdr = (char *)nmalloc(bufsize); for (i = 0; 1; i++) { if (i > bufsize - 1) *hdr = (char *)nrealloc(*hdr, (bufsize <<= 1)); res = buf_readchar(fd, *hdr + i); if (res == 1) { if ((*hdr)[i] == '\n') { if (!(i == 0 || (i == 1 && (*hdr)[0] == '\r'))) { /* If the header is non-empty, we need to check if it continues on to the other line. We do that by getting the next character without actually downloading it (i.e. peeking it). */ res = buf_peek(fd, &next); if (res == 0) return HEOF; else if (res == -1) return HERR; /* If the next character is SP or HT, just continue. */ if (next == '\t' || next == ' ') continue; } /* The header ends. */ (*hdr)[i] = '\0'; /* Get rid of '\r'. */ if (i > 0 && (*hdr)[i - 1] == '\r') (*hdr)[i - 1] = '\0'; break; } } else if (res == 0) return HEOF; else return HERR; } #ifdef DEBUG if (opt.debug) fprintf(opt.lfile, "%s\n", *hdr); #endif return HOK; } /* Parse the HTTP status line. It is of format: HTTP-Version SP Status-Code SP Reason-Phrase The function returns the status-code, or -1 if the status line is malformed. The pointer to reason-phrase is returned in rp. */ int hparsestatline(const char *hdr, const char **rp) { int mjr, mnr; /* HTTP major and minor version. */ int statcode; /* HTTP status code. */ const char *p; *rp = NULL; /* The standard format of HTTP-Version is: HTTP/x.y, where x is major version, and y is minor version. */ if (strncmp(hdr, "HTTP/", 5) != 0) return -1; hdr += 5; p = hdr; for (mjr = 0; isdigit(*hdr); hdr++) mjr = 10 * mjr + (*hdr - '0'); if (*hdr != '.' || p == hdr) return -1; ++hdr; p = hdr; for (mnr = 0; isdigit(*hdr); hdr++) mnr = 10 * mnr + (*hdr - '0'); if (*hdr != ' ' || p == hdr) return -1; /* Wget will accept only 1.0 and higher HTTP-versions. The value of minor version can be safely ignored. */ if (mjr < 1) return -1; /* Skip the space. */ ++hdr; if (!(isdigit(*hdr) && isdigit(hdr[1]) && isdigit(hdr[2]))) return -1; statcode = 100 * (*hdr - '0') + 10 * (hdr[1] - '0') + (hdr[2] - '0'); /* RFC2068 requires a SPC here, even if there is no reason-phrase. As some servers/CGI are (incorrectly) setup to drop the SPC, we'll be liberal and allow the status line to end here. */ if (hdr[3] != ' ') { if (!hdr[3]) *rp = hdr + 3; else return -1; } else *rp = hdr + 4; return statcode; } /* Skip LWS (linear white space), if present. Returns number of characters to skip. */ int hskip_lws(const char *hdr) { int i; for (i = 0; *hdr == ' ' || *hdr == '\t' || *hdr == '\r' || *hdr == '\n'; ++hdr) ++i; return i; } /* Return the content length of the document body, if this is Content-length header, -1 otherwise. */ long hgetlen(const char *hdr) { static const int l = 15; /* strlen("content-length:") */ long len; if (strncasecmp(hdr, CONTLEN_H, l)) return -1; hdr += (l + hskip_lws(hdr + l)); if (!*hdr) return -1; if (!isdigit(*hdr)) return -1; for (len = 0; isdigit(*hdr); hdr++) len = 10 * len + (*hdr - '0'); return len; } /* Return the content-range in bytes, as returned by the server, if this is Content-range header, -1 otherwise. */ long hgetrange(const char *hdr) { static const int l = 14; /* strlen("content-range:") */ long len; if (strncasecmp(hdr, CONTRANGE_H, l)) return -1; hdr += (l + hskip_lws(hdr + l)); if (!*hdr) return -1; /* Nutscape proxy server sends content-length without "bytes" specifier, which is a breach of HTTP/1.1 draft. But heck, I must support it... */ if (!strncasecmp(hdr, "bytes", 5)) { hdr += 5; hdr += hskip_lws(hdr); if (!*hdr) return -1; } if (!isdigit(*hdr)) return -1; for (len = 0; isdigit(*hdr); hdr++) len = 10 * len + (*hdr - '0'); return len; } /* Returns the malloc-ed copy of the type of the header hdr, to the first ';', or NULL if the header does not begin with CONTTYPE_H string. */ char * hgettype(const char *hdr) { static const int l = 13; /* strlen("content-type:") */ char *type; if (strncasecmp(hdr, CONTTYPE_H, l)) return NULL; hdr += (l + hskip_lws(hdr + l)); if ((type = strrchr(hdr, ';')) != NULL) *type = '\0'; return nstrdup(hdr); } /* Returns a malloc-ed copy of the location of the document, if the string hdr begins with LOCATION_H, or NULL. */ char * hgetlocation(const char *hdr) { static const int l = 9; /* strlen("location:") */ if (strncasecmp(hdr, LOCATION_H, l)) return NULL; hdr += (l + hskip_lws(hdr + l)); return nstrdup(hdr); } /* Returns a malloc-ed copy of the last-modified date of the document, if the hdr begins with LASTMODIFIED_H. */ char * hgetmodified(const char *hdr) { static const int l = 14; /* strlen("last-modified:") */ if (strncasecmp(hdr, LASTMODIFIED_H, l)) return NULL; hdr += (l + hskip_lws(hdr + l)); return nstrdup(hdr); } /* Returns 1 if the header is accept-ranges, and it contains the word "none", 0 otherwise. */ int haccepts_none(const char *hdr) { static const int l = 14; /* strlen("accept-ranges:") */ if (strncasecmp(hdr, ACCEPTRANGES_H, l)) return 0; hdr += (l + hskip_lws(hdr + l)); if (strstr(hdr, "none")) return 1; else return 0; } /* Retrieves a document through HTTP protocol. It recognizes status code, and correctly handles redirections. It closes the network socket. If it receives an error from the functions below it, it will print it if there is enough information to do so (almost always), returning the error to the caller (i.e. http_loop). Various HTTP parameters are stored to hs. Although it parses the response code correctly, it is not used in a sane way. The caller can do that, though. If u->proxy is non-NULL, the URL u will be taken as a proxy URL, and u->proxy->url will be given to the proxy server (bad naming, I'm afraid). */ uerr_t gethttp(urlinfo *u, http_stat_t *hs, int *dt) { char *request, *hdr, *type, *command, *path; char *user, *passwd; const char *error; char *pragma_h, *referer, *useragent, *range, *wwwauth, *remhost; char *all_headers; int sock, hcount, num_written, all_length, remport, statcode; long contlen, contrange; urlinfo *ou; uerr_t err; FILE *fp; /* Let the others worry about local filename... */ if (!(*dt & HEAD_ONLY)) assert(u->local != NULL); /* Initialize certain elements of struct hstat. */ hs->len = 0L; hs->contlen = -1; hs->res = -1; hs->newloc = NULL; hs->remote_time = NULL; hs->error = NULL; /* Which structure to use to yield the original URL data. */ if (u->proxy) ou = u->proxy; else ou = u; /* First: establish the connection. */ if (opt.verbose) fprintf(opt.lfile, "Connecting to %s:%hu... ", u->host, u->port); err = make_connection(&sock, u->host, u->port); switch (err) { case HOSTERR: if (!opt.quiet) { if (opt.verbose) fprintf(opt.lfile, "\n"); fprintf(opt.lfile, "%s: %s.\n", u->host, herrmsg(h_errno)); } return HOSTERR; break; case CONSOCKERR: if (!opt.quiet) { if (opt.verbose) fprintf(opt.lfile, "\n"); fprintf(opt.lfile, "socket: %s\n", mystrerror(errno)); } return CONSOCKERR; break; case CONREFUSED: if (!opt.quiet) { if (opt.verbose) fprintf(opt.lfile, "\n"); fprintf(opt.lfile, "Connection to %s:%hu refused.\n", u->host, u->port); } CLOSE(sock); return CONREFUSED; case CONERROR: if (!opt.quiet) { if (opt.verbose) fprintf(opt.lfile, "\n"); fprintf(opt.lfile, "connect: %s\n", mystrerror(errno)); } CLOSE(sock); return CONERROR; break; case NOCONERROR: /* Everything is fine! */ if (opt.verbose) fprintf(opt.lfile, "connected!\n"); break; default: assert(0); break; } /* switch */ if (u->proxy) path = u->proxy->url; else path = u->path; command = (*dt & HEAD_ONLY) ? "HEAD" : "GET"; referer = NULL; if (ou->referer) { referer = (char *)nmalloc(9 + strlen(ou->referer) + 3); sprintf(referer, "Referer: %s\r\n", ou->referer); } if (*dt & SEND_NOCACHE) pragma_h = "Pragma: no-cache\r\n"; else pragma_h = ""; if (hs->restval) { range = (char *)nmalloc(13 + numdigit(hs->restval) + 4); sprintf(range, "Range: bytes=%ld-\r\n", hs->restval); } else range = NULL; useragent = opt.useragent ? opt.useragent : version_string; /* Construct the authentication, if userid is present. */ user = ou->user; passwd = ou->passwd; search_netrc(u->host, (const char **)&user, (const char **)&passwd, 0); user = user ? user : opt.http_user; passwd = passwd ? passwd : opt.http_passwd; if (user && passwd) { char *t1, *t2; t1 = (char *)nmalloc(strlen(user) + 1 + 2 * strlen(passwd)); sprintf(t1, "%s:%s", user, passwd); t2 = base64_encode_line(t1); free(t1); wwwauth = (char *)nmalloc(strlen(t2) + 24); sprintf(wwwauth, "Authorization: Basic %s\r\n", t2); free(t2); } else wwwauth = NULL; remhost = ou->host; remport = ou->port; /* Allocate the memory for the request. */ request = (char *)nmalloc(strlen(command) + strlen(path) + strlen(useragent) + strlen(remhost) + numdigit(remport) + strlen(HTTP_ACCEPT) + (referer ? strlen(referer) : 0) + (wwwauth ? strlen(wwwauth) : 0) + (range ? strlen(range) : 0) + strlen(pragma_h) + (opt.user_header ? strlen(opt.user_header) : 0) + 60); /* Construct the request. */ sprintf(request, "%s %s HTTP/1.0\r\nUser-Agent: %s\r\nHost: %s:%d\r\nAccept: %s\r\n%s%s%s%s%s\r\n", command, path, useragent, remhost, remport, HTTP_ACCEPT, referer ? referer : "", wwwauth ? wwwauth : "", range ? range : "", pragma_h, opt.user_header ? opt.user_header : ""); /* Free the temporary memory. */ if (referer) free(referer); if (range) free(range); if (wwwauth) free(wwwauth); DEBUGP(request); /* Send the request to server */ num_written = iwrite(sock, request, strlen(request)); if (num_written != strlen(request)) { if (opt.verbose) fprintf(opt.lfile, "Failed writing HTTP request.\n"); free(request); CLOSE(sock); return WRITEFAILED; } if (opt.verbose) fprintf(opt.lfile, "%s request sent, fetching headers... ", u->proxy ? "HTTP proxy" : "HTTP"); free(request); contlen = contrange = -1; type = NULL; statcode = -1; *dt &= ~RETROKF; /* Since this is a new connection, we may safely discard anything left in the buffer. */ buf_discard(); all_headers = NULL; all_length = 0; /* Header-fetching loop. */ hcount = 0; for (;;) { ++hcount; /* Get the header. */ err = fetch_next_header(sock, &hdr); /* Check for errors. */ if (err == HEOF) { if (!opt.quiet) { if (opt.verbose) fprintf(opt.lfile, "\n"); fprintf(opt.lfile, "End of file while parsing headers.\n"); } free(hdr); if (type) free(type); if (hs->newloc) free(hs->newloc); if (all_headers) free(all_headers); CLOSE(sock); return HEOF; } else if (err == HERR) { if (!opt.quiet) { if (opt.verbose) fprintf(opt.lfile, "\n"); fprintf(opt.lfile, "Read error (%s) in headers.\n", mystrerror(errno)); } free(hdr); if (type) free(type); if (hs->newloc) free(hs->newloc); if (all_headers) free(all_headers); CLOSE(sock); return HERR; } /* If the headers are to be saved to a file later, save them to memory now. */ if (opt.save_headers) { int lh = strlen(hdr); all_headers = (char *)nrealloc(all_headers, all_length + lh + 2); memcpy(all_headers + all_length, hdr, lh); all_length += lh; all_headers[all_length++] = '\n'; all_headers[all_length] = '\0'; } /* Exit on empty header. */ if (!*hdr) { free(hdr); break; } /* Print the header if necessary. */ if (opt.verbose && opt.server_response) fprintf(opt.lfile, "\n%d %s", hcount, hdr); /* Check for errors documented in the first header. */ if (hcount == 1) { /* Parse the first line of server response. */ statcode = hparsestatline(hdr, &error); hs->statcode = statcode; /* Store the descriptive response. */ if (statcode == -1) /* malformed request */ hs->error = nstrdup("UNKNOWN"); else if (!*error) hs->error = nstrdup("(no description)"); else hs->error = nstrdup(error); } /* Try getting content-length. */ if (contlen == -1 && !opt.ignore_length) contlen = hgetlen(hdr); /* Try getting content-type. */ if (!type) type = hgettype(hdr); /* Try getting location. */ if (!hs->newloc) hs->newloc = hgetlocation(hdr); /* Try getting last-modified. */ if (!hs->remote_time) hs->remote_time = hgetmodified(hdr); /* Check for accept-ranges header. If it contains the word `none', disable the ranges. */ if (*dt & ACCEPTRANGES) if (haccepts_none(hdr)) *dt &= ~ACCEPTRANGES; /* Try getting content-range. */ if (contrange == -1) contrange = hgetrange(hdr); /* Free the current header. */ free(hdr); } /* for (;;) */ /* 20x responses are counted among successful by default. */ if (H_20X(statcode)) *dt |= RETROKF; if (type) (!strncasecmp(type, TEXTHTML_S, strlen(TEXTHTML_S))) ? (*dt |= TEXTHTML) : (*dt &= ~TEXTHTML); else *dt &= ~TEXTHTML; /* NOT text/html by default */ if (contrange == -1) hs->restval = 0; else if (contrange != hs->restval || (H_PARTIAL(statcode) && contrange == -1)) { /* This means the whole request was somehow misunderstood by the server. Bail out. */ if (type) free(type); if (hs->newloc) free(hs->newloc); if (all_headers) free(all_headers); CLOSE(sock); return RANGEERR; } if (hs->restval) { if (contlen != -1) contlen += contrange; else contrange = -1; /* If conent-length was not sent, content-range will be ignored. */ } hs->contlen = contlen; if (opt.verbose) { if (!opt.server_response) fprintf(opt.lfile, "done."); fprintf(opt.lfile, "\n"); } /* Return if redirected. */ if (H_REDIRECTED(statcode) || statcode == HTTP_MULTIPLE_CHOICES) { /* RFC2068 says that in case of the 300 (multiple choices) response, the server can output a preferred URL through `Location' header; otherwise, the request should be treated like GET. So, if the location is set, it will be a redirection; otherwise, just proceed normally. */ if (statcode == HTTP_MULTIPLE_CHOICES && !hs->newloc) *dt |= RETROKF; else { fprintf(opt.lfile, "Location: %s%s\n", hs->newloc ? hs->newloc : "unspecified", hs->newloc ? " [following]" : ""); CLOSE(sock); if (all_headers) free(all_headers); if (type) free(type); return NEWLOCATION; } } if (opt.verbose) { if ((*dt & RETROKF) && !opt.server_response) { /* No need tp print this output if the body won't be downloaded at all, or if the original server response is printed. */ fprintf(opt.lfile, "Length: "); if (contlen != -1) { fprintf(opt.lfile, "%s", legible(contlen)); if (contrange != -1) fprintf(opt.lfile, " (%s to go)", legible(contlen - contrange)); } else fprintf(opt.lfile, opt.ignore_length ? "ignored" : "unspecified"); if (type) fprintf(opt.lfile, " [%s]\n", type); else fprintf(opt.lfile, "\n"); } } if (type) free(type); type = NULL; /* We don't need it any more. */ /* Return if we have no intention of further downloading. */ if (!(*dt & RETROKF) || (*dt & HEAD_ONLY)) { /* In case someone cares to look... */ hs->len = 0L; hs->res = 0; if (all_headers) free(all_headers); if (type) free(type); CLOSE(sock); return RETRFINISHED; } /* Open the local file. */ if (!opt.dfp) { mkalldirs(u->local); fp = fopen(u->local, hs->restval ? "ab" : "wb"); if (!fp) { if (!opt.quiet) fprintf(opt.lfile, "%s: %s\n", u->local, mystrerror(errno)); CLOSE(sock); if (all_headers) free(all_headers); return FOPENERR; } } else /* opt.dfp */ fp = opt.dfp; if (opt.save_headers) fwrite(all_headers, 1, all_length, fp); reset_timer(); /* Get the contents of the document. */ hs->res = get_contents(sock, fp, &hs->len, hs->restval, 0); hs->dltime = elapsed_time(); if (!opt.dfp) fclose(fp); else fflush(fp); if (all_headers) free(all_headers); CLOSE(sock); if (hs->res == -2) return FWRITEERR; return RETRFINISHED; } /* The genuine HTTP loop! This is the part where the retrieval is retried, and retried, and retried, and... */ uerr_t http_loop(urlinfo *u, char **newloc, int *dt) { static int first_retrieval = 1; int count; int use_ts, got_head = 0; /* Time-stamping info. */ char *tms, *suf, *locf, *tmrate; uerr_t err; time_t tml = -1, tmr = -1; /* Local and remote time-stamps. */ long local_size = 0; /* The size of the local file. */ http_stat_t hstat; /* HTTP status. */ struct stat st; void my_touch PARAMS((char *, time_t)); *newloc = NULL; /* Warn on wildcard usage in HTTP. Don't use has_wildcards because it would also warn on '?', and we don't what that because of CGI. */ if (opt.verbose && strchr(u->url, '*')) fprintf(opt.lfile, "Warning: wildcards not supported in HTTP.\n"); /* Determine the local filename. */ if (!u->local) { if (!opt.timestamping || opt.recursive) u->local = url_filename(u->proxy ? u->proxy : u); else /* opt.timestamping && !recursive */ { if (*(u->proxy ? u->proxy->file : u->file)) u->local = nstrdup(u->proxy ? u->proxy->file : u->file); else u->local = nstrdup("index.html"); } } if (!opt.output_document) locf = u->local; else locf = opt.output_document; if (opt.noclobber && exists(u->local)) { /* If opt.noclobber is turned on and file already exists, do not retrieve the file */ if (opt.verbose) fprintf(opt.lfile, "File `%s' already there, will not retrieve.\n", u->local); /* If the file is there, we suppose it's retrieved OK. */ *dt |= RETROKF; /* If its suffix is "html" or (yuck!) "htm", we suppose it's text/html, a harmless lie. */ if (((suf = suffix(u->local)) != NULL) && (!strcmp(suf, "html") || !strcmp(suf, "htm"))) *dt |= TEXTHTML; free(suf); /* Another harmless lie: */ return RETROK; } use_ts = 0; if (opt.timestamping) { if (stat(u->local, &st) == 0) { use_ts = 1; tml = st.st_mtime; local_size = st.st_size; got_head = 0; } } /* Reset the counter. */ count = 0; *dt = 0 | ACCEPTRANGES; /* THE loop */ do { /* Increment the pass counter. */ ++count; /* Wait before the retrieval (unless this is the very first retrieval). */ if (!first_retrieval && opt.wait) sleep(opt.wait); if (first_retrieval) first_retrieval = 0; /* Get the current time string. */ tms = time_str(NULL); /* Print fetch message, if opt.verbose. */ if (opt.verbose) { char *hurl = str_url(u->proxy ? u->proxy : u, 1); char tmp[15]; strcpy(tmp, " "); if (count > 1) sprintf(tmp, "(try:%2d)", count); fprintf(opt.lfile, "--%s-- %s\n %s => `%s'\n", tms, hurl, tmp, locf); free(hurl); } /* Default document type is empty. However, if spider mode is on or time-stamping is employed, HEAD_ONLY commands is encoded within *dt. */ if (opt.spider || (use_ts && !got_head)) *dt |= HEAD_ONLY; else *dt &= ~HEAD_ONLY; /* Assume no restarting. */ hstat.restval = 0L; /* Decide whether or not to restart. */ if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest) && exists(u->local)) if (stat(u->local, &st) == 0) hstat.restval = st.st_size; /* Decide whether to send the no-cache directive. */ if (u->proxy && (count > 1 || (opt.proxy_cache == 0))) *dt |= SEND_NOCACHE; else *dt &= ~SEND_NOCACHE; /* Try fetching the document, or at least its head. :-) */ err = gethttp(u, &hstat, dt); /* Time? */ tms = time_str(NULL); /* Get the new location (with or without the redirection). */ if (hstat.newloc) *newloc = nstrdup(hstat.newloc); switch (err) { case HERR: case HEOF: case CONSOCKERR: case CONCLOSED: case CONERROR: case READERR: case WRITEFAILED: case RANGEERR: /* Non-fatal errors continue executing the loop, which will bring them to "while" statement at the end, to judge whether the number of tries was exceeded. */ FREEHSTAT(hstat); printwhat(count, opt.ntry); continue; break; case HOSTERR: case CONREFUSED: case PROXERR: /* Fatal errors just return from the function. */ FREEHSTAT(hstat); return err; break; case FWRITEERR: case FOPENERR: /* Another fatal error. */ if (!opt.quiet) { if (opt.verbose) fprintf(opt.lfile, "\n"); fprintf(opt.lfile, "Cannot write to `%s' (%s).\n", u->local, mystrerror(errno)); } FREEHSTAT(hstat); return err; break; case NEWLOCATION: /* Return the new location to the caller. */ if (!hstat.newloc) { if (!opt.quiet) fprintf(opt.lfile, "ERROR: Redirection (%d) without location.\n", hstat.statcode); return WRONGCODE; } FREEHSTAT(hstat); return NEWLOCATION; break; case RETRFINISHED: /* Deal with you later. */ break; default: /* All possibilities should have been exhausted. */ assert(0); } if (!(*dt & RETROKF)) { if (!opt.quiet) { fprintf(opt.lfile, "%s ERROR %d: %s.\n", tms, hstat.statcode, hstat.error); if (opt.verbose) fputc('\n', opt.lfile); } FREEHSTAT(hstat); return WRONGCODE; } /* Did we get the time-stamp? */ if (!got_head) { if (opt.timestamping && !hstat.remote_time) { if (!opt.quiet) fprintf(opt.lfile, "Last-modified header missing -- time-stamps turned off.\n"); } else if (hstat.remote_time) { /* Convert the date-string into struct tm. */ tmr = http_atotm(hstat.remote_time); if (tmr == -1) if (opt.verbose) fprintf(opt.lfile, "Last-modified header invalid -- time-stamp ignored.\n"); } } /* The time-stamping section. */ if (use_ts) { got_head = 1; *dt &= ~HEAD_ONLY; use_ts = 0; /* No more time-stamping. */ count = 0; /* The retrieve count for HEAD is reset. */ if (hstat.remote_time && tmr != -1) { /* Now time-stamping can be used validly. Time - stamping means that if the sizes of the local and remote file match, and local file is newer than the remote file, it will not be retrieved. Otherwise, the normal download procedure is resumed. */ if (local_size == hstat.contlen && tml >= tmr) { if (opt.verbose) fprintf(opt.lfile, "Local file `%s' is more recent, not retrieving.\n\n", u->local); FREEHSTAT(hstat); return RETROK; } else if (local_size != hstat.contlen) { if (opt.verbose) fprintf(opt.lfile, "The sizes do not match (local %ld), retrieving.\n", local_size); } else { if (opt.verbose) fprintf(opt.lfile, "Remote file is newer, retrieving.\n"); } } FREEHSTAT(hstat); continue; } /* use_ts */ if (!opt.dfp && (tmr != -1) && !opt.spider && ((hstat.len == hstat.contlen) || ((hstat.res == 0) && ((hstat.contlen == -1) || (hstat.len >= hstat.contlen && !opt.kill_longer))))) { my_touch(u->local, tmr); } /* End of time-stamping section. */ if (opt.spider) { fprintf(opt.lfile, "%d %s\n\n", hstat.statcode, hstat.error); return RETROK; } /* It is now safe to free the remainder of hstat, since the strings within it will no longer be used. */ FREEHSTAT(hstat); tmrate = rate(hstat.len - hstat.restval, hstat.dltime); if (hstat.len == hstat.contlen) { if (*dt & RETROKF) { if (opt.verbose) fprintf(opt.lfile, "%s (%s) - `%s' saved [%ld/%ld]\n\n", tms, tmrate, locf, hstat.len, hstat.contlen); else if (!opt.quiet) fprintf(opt.lfile, "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n", tms, u->url, hstat.len, hstat.contlen, locf, count); } ++opt.numurls; opt.downloaded += hstat.len; return RETROK; } else if (hstat.res == 0) /* No read error */ { if (hstat.contlen == -1) /* We don't know how much we were supposed to get, so... */ { if (*dt & RETROKF) { if (opt.verbose) fprintf(opt.lfile, "%s (%s) - `%s' saved [%ld]\n\n", tms, tmrate, locf, hstat.len); else if (!opt.quiet) fprintf(opt.lfile, "%s URL:%s [%ld] -> \"%s\" [%d]\n", tms, u->url, hstat.len, locf, count); } ++opt.numurls; opt.downloaded += hstat.len; return RETROK; } else if (hstat.len < hstat.contlen) /* Meaning we lost the connection too soon */ { if (opt.verbose) { fprintf(opt.lfile, "%s (%s) - Connection closed at byte %ld. ", tms, tmrate, hstat.len); printwhat(count, opt.ntry); } continue; } else if (!opt.kill_longer) /* Meaning we got more than expected */ { if (opt.verbose) fprintf(opt.lfile, "%s (%s) - `%s' saved [%ld/%ld])\n\n", tms, tmrate, locf, hstat.len, hstat.contlen); else if (!opt.quiet) fprintf(opt.lfile, "%s URL:%s [%ld/%ld] -> \"%s\" [%d]\n", tms, u->url, hstat.len, hstat.contlen, locf, count); ++opt.numurls; opt.downloaded += hstat.len; return RETROK; } else /* The same, but not accepted */ { if (opt.verbose) { fprintf(opt.lfile, "%s (%s) - Connection closed at byte %ld/%ld. ", tms, tmrate, hstat.len, hstat.contlen); printwhat(count, opt.ntry); } continue; } } else /* Now hstat.res can only be -1 */ { if (hstat.contlen == -1) { if (opt.verbose) { fprintf(opt.lfile, "%s (%s) - Read error at byte %ld (%s).", tms, tmrate, hstat.len, mystrerror(errno)); printwhat(count, opt.ntry); } continue; } else /* hstat.res == -1 and contlen is given */ { if (opt.verbose) { fprintf(opt.lfile, "%s (%s) - Read error at byte %ld/%ld (%s). ", tms, tmrate, hstat.len, hstat.contlen, mystrerror(errno)); printwhat(count, opt.ntry); } continue; } } /* not reached */ break; } while (!opt.ntry || (count < opt.ntry)); return TRYLIMEXC; } /* Encode a zero-terminated string in base64. Returns the malloc-ed encoded line. This is useful for HTTP only. Note that the string may not contain NUL characters. */ char * base64_encode_line(const char *s) { /* Conversion table. */ static char tbl[64] = { 'A','B','C','D','E','F','G','H', 'I','J','K','L','M','N','O','P', 'Q','R','S','T','U','V','W','X', 'Y','Z','a','b','c','d','e','f', 'g','h','i','j','k','l','m','n', 'o','p','q','r','s','t','u','v', 'w','x','y','z','0','1','2','3', '4','5','6','7','8','9','+','/' }; int len, i; char *res; unsigned char *p; len = strlen(s); res = (char *)nmalloc(4 * ((len + 2) / 3) + 1); p = (unsigned char *)res; /* Transform the 3x8 bits to 4x6 bits, as required by base64. */ for (i = 0; i < len; i += 3) { *p++ = tbl[s[0] >> 2]; *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)]; *p++ = tbl[s[2] & 0x3f]; s += 3; } /* Pad the result if necessary... */ if (i == len + 1) *(p - 1) = '='; else if (i == len + 2) *(p - 1) = *(p - 2) = '='; /* ...and zero-teminate it. */ *p = '\0'; return res; } /* Converts struct tm to time_t, assuming the data in tm is UTC rather than local timezone (as mktime assumes). Contributed by Roger Beeman <beeman@cisco.com>. */ time_t mktime_from_utc(struct tm *t) { time_t tl, tb; tl = mktime(t); tb = mktime(gmtime(&tl)); return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl))); } /* Converts ASCII time to time_t. The time can be in three formats allowed for HTTP servers to send, as per RFC2068 -- RFC1123-date, RFC850-date or asctime-date. strptime() is used to recognize various dates, which makes it a little bit slacker than the RFC1123/RFC850/asctime (e.g. it always allows shortened dates and months, one-digit days, etc.). It also allows more than one space anywhere where the specs require one SP. The routine should probably be even slacker (RFC2068 recommends this), but I do not have the time to write one. Returns the computed time_t representation, or -1 if all the schemes fail. */ time_t http_atotm(char *s) { struct tm t; t.tm_isdst = -1; /* NOTE: We don't use `%n' for white space, as OSF's strptime uses it to eat all white space up to (and including) a newline, and the function fails (!) if there is no newline. Let's hope all strptime-s use ` ' to skipp *all* whitespace instead of just one (it works that way on all the systems I've tested it on). */ /* Let's try RFC1123 date. */ if (strptime(s, "%a, %d %b %Y %T", &t)) return mktime_from_utc(&t); /* RFC850 date. */ if (strptime(s, "%a, %d-%b-%y %T", &t)) return mktime_from_utc(&t); /* asctime date. */ if (strptime(s, "%a %b %d %T %Y", &t)) return mktime_from_utc(&t); /* Failure. */ return -1; }